Set working directory
## [1] "/Users/gabays/github/RiseAndFall"
Load packages
if(!require("ggplot2")){
install.packages("ggplot2")
library(ggplot2)
}
if(!require("roll")){
install.packages("roll")
library(roll)
}
if(!require("purrr")){
install.packages("purrr")
library(purrr)
}
if(!require("stylo")){
install.packages("stylo")
library(stylo)
}
if(!require("dplyr")){
install.packages("dplyr")
library(dplyr)
}Load external functions
Load previously computed data
Get Metadata and corpus as 3-grams
#Metadata
metadata = read.csv(file="./metadata.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)
metadata = metadata[sort(rownames(metadata)), ]
#data
data = read.csv(file="./feats_tests_n3_k_5000.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)
data_stop = read.csv(file="./feats_tests_n1_k_5000.csv", sep=",", header = TRUE, row.names = 1, stringsAsFactors=T)We compute de distribution: which 3grams are relevant?
We work with the 2000 most frequent 3grams – after that the curve is totally flat
We normalise the vectors
d <- t(d)
#normalisation
d <- normalisations(d)
#stopwords
d_stop <- t(d_stop)
d_stop <- normalisations(d_stop)We add the metadata
# we add the metadata
control<-rbind(rownames(metadata),colnames(d))
# we control that we have similar values
head(t(control))## [,1]
## [1,] "abeille-argelie"
## [2,] "abeille-coriolan"
## [3,] "abeille-lyncee"
## [4,] "about-risette"
## [5,] "adenis-homme-qui-ne-peut-pas-siffler"
## [6,] "aigueberre-avare-amoureux"
## [,2]
## [1,] "abeille-argelie.txt"
## [2,] "abeille-coriolan.txt"
## [3,] "abeille-lyncee.txt"
## [4,] "about-risette.txt"
## [5,] "adenis-homme-qui-ne-peut-pas-siffler.txt"
## [6,] "aigueberre-avare-amoureux.txt"
## [,1]
## [1511,] "voltaire-tanis-zelide"
## [1512,] "voltaire-zaire"
## [1513,] "voltaire-zulime"
## [1514,] "vondrebeck-alard-forces-de-l-amour"
## [1515,] "zola-madeleine"
## [1516,] "zola-therese-raquin"
## [,2]
## [1511,] "voltaire-tanis-zelide.txt"
## [1512,] "voltaire-zaire.txt"
## [1513,] "voltaire-zulime.txt"
## [1514,] "vondrebeck-alard-forces-de-l-amour.txt"
## [1515,] "zola-madeleine.txt"
## [1516,] "zola-therese-raquin.txt"
Alternative: loading plays in plain text (for later)
corpus<-list()
#Get the list of all txt files
TxtFiles <- list.files(path = "txt",pattern = "txt$")
#Loop over all files
for(x in TxtFiles){
#Get the path
FullPath <- paste("txt", x, sep="/")
#Get the name (drop .txt extension)
# TextName <- sub("\\.txt", "", x)
#Get the text in the file
# FullText <- suppressWarnings(read.csv(FullPath, header = FALSE, sep = "\n", fileEncoding="UTF-8"))
FullText <- suppressWarnings(readLines(FullPath))
#Append the text to the corpus
corpus<-append(corpus,FullText)
}
View(corpus)We transform the loaded texts into minable data
#tokenisation
corpus.tok = lapply(corpus, txt.to.words2)
#Counting frequency of tokens
corpus.tok.list = make.frequency.list(corpus.tok)
#Transform frequency into a table
corpus.tok.list.freq=make.table.of.frequencies(corpus.tok, corpus.tok.list, relative = F)
#I name columns
row.names(corpus.tok.list.freq)=TxtFiles
#I save a copy
write.csv(corpus.tok.list.freq, file = "corpus.bench.tok.list.freq.csv",row.names=TRUE)
#Convert table into dataframe
corpus.tok.list.freq = as.data.frame(read.csv(file="corpus.bench.tok.list.freq.csv", sep = ",", header = TRUE, row.names=1, quote = '\"'))
#transposition (rows become columns)
corpus.tok.list.freq = t(corpus.tok.list.freq)
#normalisation
corpus.tok.list.freq = normalisations(corpus.tok.list.freq)
#Displaying the dataframe
View(corpus.tok.list.freq)We control that stopwords do identify genres
distToTragedy <- DistToCentroid(d_stop, centroid = rowMeans(d_stop[, metadata[, "Genre"] == "tragedy" & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]))
distToComedy <- DistToCentroid(d_stop, centroid = rowMeans(d_stop[, metadata[, "Genre"] == "comedy" & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]))
m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")
#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")
#fix colors
colors <- metadata[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
lty = 1, # Grid line type
col = "gray", # Grid line color
lwd = 1) # Grid line width# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays
#Save the image
png("./R/images/clusters_stop.png", width = 2500, height = 2000, res=300)
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() ## quartz_off_screen
## 2
We control tragedies classified with comedies:
literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '10' & DistTragedyCentroid >10)## DistTragedyCentroid DistComedyCentroid
## aubignac-pucelle-prose.txt 10.12718 9.878585
## delavigne-famille-temps-luther.txt 10.09809 10.880326
## mathieu-magicienne-etrangere.txt 10.57133 11.581096
## piron-nouvelle-messaline.txt 10.26928 10.859707
## puget-de-la-serre-pandoste-ii.txt 10.57675 10.267505
## puget-de-la-serre-thesee.txt 10.65710 9.905667
## puget-de-la-serre-thomas-morus.txt 10.49597 10.588407
## viau-pyrame.txt 10.17409 11.014445
## literaryGenre
## aubignac-pucelle-prose.txt 10
## delavigne-famille-temps-luther.txt 10
## mathieu-magicienne-etrangere.txt 10
## piron-nouvelle-messaline.txt 10
## puget-de-la-serre-pandoste-ii.txt 10
## puget-de-la-serre-thesee.txt 10
## puget-de-la-serre-thomas-morus.txt 10
## viau-pyrame.txt 10
We control comedies classified with tragedies:
literaryGenre <- metadata[, "Genre"]
check_anomalies<-as.data.frame(cbind(m,literaryGenre))
subset(check_anomalies, literaryGenre == '2' & DistTragedyCentroid <9)## DistTragedyCentroid DistComedyCentroid
## archambault-etrennes.txt 8.433885 8.781233
## chapuzeau-geneve-delivree.txt 8.684980 11.118776
## cinq-auteurs-comedie-des-tuileries.txt 8.769898 10.867372
## colle-alfonse.txt 8.727235 11.825259
## corneillep-illusion-comique.txt 8.564171 11.183531
## corneillep-melite.txt 8.865439 10.693274
## corneillet-geolier-de-sois-meme.txt 7.810412 9.790915
## corneillet-illustres-ennemis.txt 8.454825 10.072041
## cubieres-palmezeaux-lacrymanie.txt 8.516884 11.059902
## labaume-messe-de-gnide.txt 8.183805 10.693331
## moline-legislatrices.txt 8.180516 7.169399
## rotrou-bague-de-l-oubli.txt 8.671355 10.658463
## rotrou-belle-alphrede.txt 8.229971 11.400531
## rotrou-sosies.txt 8.590999 10.306664
## saint-roman-dialogue.txt 8.877755 11.704875
## literaryGenre
## archambault-etrennes.txt 2
## chapuzeau-geneve-delivree.txt 2
## cinq-auteurs-comedie-des-tuileries.txt 2
## colle-alfonse.txt 2
## corneillep-illusion-comique.txt 2
## corneillep-melite.txt 2
## corneillet-geolier-de-sois-meme.txt 2
## corneillet-illustres-ennemis.txt 2
## cubieres-palmezeaux-lacrymanie.txt 2
## labaume-messe-de-gnide.txt 2
## moline-legislatrices.txt 2
## rotrou-bague-de-l-oubli.txt 2
## rotrou-belle-alphrede.txt 2
## rotrou-sosies.txt 2
## saint-roman-dialogue.txt 2
We control that 3-grams do identify genres
distToTragedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "tragedy"]))
distToComedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "comedy"]))
m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")
#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")
#fix colors
colors <- metadata[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
lty = 1, # Grid line type
col = "gray", # Grid line color
lwd = 1) # Grid line width# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays
#Save the image
png("./R/images/clusters_3grams.png", width = 2500, height = 2000, res=300)
plot(m, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() ## quartz_off_screen
## 2
Results are more precise than with stopwords. Now we do the same, but only with plays written between 1500 and 1800:
#Centroid of tragedies
distToTragedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "tragedy" & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]))
#Centroid of comedies
distToComedy <- DistToCentroid(d, centroid = rowMeans(d[, metadata[, "Genre"] == "comedy" & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]))
#On dataframe with two values
m <- cbind(distToTragedy, distToComedy)
colnames(m) <- c("DistTragedyCentroid","DistComedyCentroid")
#getting rid of too old/too recent plays
m<-t(m)
m_clean<-m[, metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
m_clean<-t(m_clean)
m_clean2<-na.omit(m_clean)
metadata_clean <- filter(metadata, Date<"1800" & Date>"1500" )
#create transparent color
mycol <- t_col("white", perc = 100, name = "transparent")
#fix colors
colors <- metadata_clean[, "Genre"]
levels(colors) = c("transparent","blue", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "transparent", "red", "transparent", "transparent")
#plot
plot(m_clean2, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL,
lty = 1, # Grid line type
col = "gray", # Grid line color
lwd = 1) # Grid line width# text(m[, 1], m[, 2], labels=row.names(m), cex= 0.1) # To add the name of the plays
#Save the image
png("./R/images/clusters_3grams.png", width = 2500, height = 2000, res=300)
plot(m_clean2, col=as.character(colors))
legend(18, 38, c("comedy", "tragedy"),col=c("blue", "red"),pch=1)
grid(nx = NULL, ny = NULL, lty = 1, col = "gray", lwd = 1)
dev.off() ## quartz_off_screen
## 2
We control tragedies classified with comedies:
literaryGenre <- metadata_clean[, "Genre"]
check_anomalies<-as.data.frame(cbind(m_clean2,literaryGenre))
subset(check_anomalies, literaryGenre == '10' & DistTragedyCentroid >33)## DistTragedyCentroid DistComedyCentroid
## bievre-vercingentorixe.txt 33.42857 35.06555
## champrepus-ulysse.txt 35.02691 36.81744
## deshoulieres-mort-de-cochon.txt 34.30683 31.49414
## donneau-de-vise-amours-du-soleil.txt 33.29494 33.16815
## puget-de-la-serre-pandoste-ii.txt 34.99850 34.82156
## puget-de-la-serre-thesee.txt 33.80002 31.19131
## soret-ceciliade.txt 34.31839 34.24210
## literaryGenre
## bievre-vercingentorixe.txt 10
## champrepus-ulysse.txt 10
## deshoulieres-mort-de-cochon.txt 10
## donneau-de-vise-amours-du-soleil.txt 10
## puget-de-la-serre-pandoste-ii.txt 10
## puget-de-la-serre-thesee.txt 10
## soret-ceciliade.txt 10
We control comedies classified with tragedies:
literaryGenre <- metadata_clean[, "Genre"]
check_anomalies<-as.data.frame(cbind(m_clean2,literaryGenre))
subset(check_anomalies, literaryGenre == '2' & DistTragedyCentroid <32)## DistTragedyCentroid DistComedyCentroid
## brosse-aveugle-clairvoyant.txt 31.68002 32.72420
## cailleau-tragedies-voltaire.txt 31.78211 26.69976
## colle-alfonse.txt 29.49724 36.12450
## corneillep-melite.txt 31.31384 32.51813
## corneillep-place-royale.txt 31.66732 33.92384
## corneillep-suivante.txt 30.43139 31.51745
## corneillet-charme-de-la-voix.txt 29.28797 30.36489
## corneillet-geolier-de-sois-meme.txt 29.81206 32.83324
## corneillet-illustres-ennemis.txt 29.67439 34.26227
## doruxigne-alzate.txt 28.83935 34.69877
## dumaniant-francais-en-huronie-1787.txt 31.58426 33.95121
## lesage-dorneval-ile-gougou.txt 31.98525 21.88079
## moline-legislatrices.txt 31.09307 32.53820
## ouville-soupcons.txt 30.78827 30.35115
## rotrou-bague-de-l-oubli.txt 31.94664 34.41904
## rotrou-belle-alphrede.txt 29.02633 36.57280
## rotrou-sosies.txt 30.70770 34.52032
## scarron-boutades-matamore.txt 30.97121 30.95656
## scudery-fils-suppose.txt 31.72470 34.40757
## villiers-critique-du-tartuffe.txt 31.88891 29.41916
## literaryGenre
## brosse-aveugle-clairvoyant.txt 2
## cailleau-tragedies-voltaire.txt 2
## colle-alfonse.txt 2
## corneillep-melite.txt 2
## corneillep-place-royale.txt 2
## corneillep-suivante.txt 2
## corneillet-charme-de-la-voix.txt 2
## corneillet-geolier-de-sois-meme.txt 2
## corneillet-illustres-ennemis.txt 2
## doruxigne-alzate.txt 2
## dumaniant-francais-en-huronie-1787.txt 2
## lesage-dorneval-ile-gougou.txt 2
## moline-legislatrices.txt 2
## ouville-soupcons.txt 2
## rotrou-bague-de-l-oubli.txt 2
## rotrou-belle-alphrede.txt 2
## rotrou-sosies.txt 2
## scarron-boutades-matamore.txt 2
## scudery-fils-suppose.txt 2
## villiers-critique-du-tartuffe.txt 2
Warning: on all the dataset, not on plays written between 1500 and 1800.
## [1] "" "comedy" "dialogue" "drama" "farce"
## [6] "monologue" "opera" "proverbe" "saynete" "tragedy"
## [11] "tragicomedy" "vaudeville"
#Get genre of all plays
clusters <- metadata[, "Genre"]
levels(clusters) <- c(1:13)
#Compute inertia
clusterInertia(t(d), as.numeric(clusters))## [1] 198.08189 624.87127 23.60303 30.45398 10.32456 56.62311 11.61778
## [8] 112.83419 21.57262 212.44350 61.53742 5.48289
Comedy (with 3-grams)
comedies = d[, metadata[, "Genre"] == "comedy"]
comediesToCentroid = DistToCentroid(comedies, method="manhattan")
summary(comediesToCentroid)## DistToCentroid
## Min. :20.97
## 1st Qu.:30.11
## Median :31.50
## Mean :31.36
## 3rd Qu.:32.80
## Max. :37.48
## genlis-belle-et-la-bete.txt lesage-dorneval-ile-gougou.txt
## 20.96861 21.59379
## liborliere-cloison.txt dancourt-mari-retrouve.txt
## 25.91855 26.14891
## palissot-barbier-de-bagdad.txt dancourt-impromptu-de-garnison.txt
## 26.17214 26.27755
## colle-alfonse.txt chapuzeau-geneve-delivree.txt
## 36.09004 36.64682
## corneillep-illusion-comique.txt quinault-comedie-sans-comedie.txt
## 36.64705 36.72005
## rotrou-belle-alphrede.txt colle-cocatrix.txt
## 36.88809 37.47806
Tragedies (with 3-grams)
tragedies = d[, metadata[, "Genre"] == "tragedy"]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")
summary(tragediesToCentroid)## DistToCentroid
## Min. :18.76
## 1st Qu.:24.87
## Median :26.27
## Mean :26.82
## 3rd Qu.:28.29
## Max. :35.04
## la-thuilerie-soliman.txt barbier-mort-de-cesar.txt saurin-spartacus.txt
## 18.76257 20.31262 21.40985
## genest-zelonide.txt pellegrin-tibere.txt voltaire-mariamne.txt
## 21.86938 22.19090 22.40103
## puget-de-la-serre-thesee.txt deshoulieres-mort-de-cochon.txt
## 33.84736 34.30966
## soret-ceciliade.txt delavigne-famille-temps-luther.txt
## 34.31754 34.70682
## champrepus-ulysse.txt puget-de-la-serre-pandoste-ii.txt
## 35.01097 35.03667
tragedies <- d[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedy3grams.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidtragedies <- corpus.tok.list.freq[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedyWords.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidtragedies <- d_stop[, metadata[, "Genre"] == 'tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='tragedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/tragedyStop.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidtragedies <- d[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedy3grams.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidtragedies <- corpus.tok.list.freq[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedyWords.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidtragedies <- d_stop[, metadata[, "Genre"] == 'comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500]
tragediesToCentroid = DistToCentroid(tragedies, method="manhattan")evoCentroid<-cbind(tragediesToCentroid,metadata[metadata$Genre=='comedy' & metadata[, "Date"] <1800 & metadata[, "Date"] >1500, ])
vizEvoCentroid <- ggplot(data = evoCentroid, mapping = aes(x = Date, y = DistToCentroid))+
geom_point(stat = "summary", fun = "mean")+
geom_smooth(method = loess, size = 1)+ theme_bw()
ggsave("./R/images/comedyStop.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidWe select all the authors with more than 3 plays in the dataset
#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]
#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)## [1] 113
We select all the authors who have more than 1 comedy
authorsSelected=c()
for (x in multiples){
results <- as.data.frame(metadata[metadata$Genre == 'comedy' & metadata$Author == x,])
if(nrow(results)>2){
authorsSelected<-append(authorsSelected,x)
}
}We compute the distance to the centroid of the author, the centroid of the genre and the distance between both:
#create an empty df to store results
df_comedy=data.frame(matrix(ncol = 7, nrow = 0))
#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0
#loop over plays
for (x in plays){
#increment
incr<-incr+1
#get author name
author <- metadata[incr,2]
#get genre
genre<-metadata[incr,4]
#get date
date<-metadata[incr,3]
#if author has written multiple texts present in the corpus and genre is known
if (author %in% authorsSelected==TRUE & genre =='comedy' & date>1500 & date<1800){
#get the data of all the plays of an author
authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
# authorData = d[, metadata[, "Author"] == author]
#get all the plays of the same genre
# genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"]]
genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
#compute distance to centroid of the author
authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
#compute distance to centroid of the genre
genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
#compute the mean of the centroid to the genre
meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
#get the distance of the play to the author
playDistAuthor<-authorToCentroid[x,]
#get the distance of the play to the genre
playDistGenre<-genreToCentroid[x,]
#diff author genre (rounded)
distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
#distance<-round(playDistGenre-playDistAuthor, digits = 2)
#save the result if value is not zero (problem occurred)
control<-playDistAuthor!=''
if (is.na(control)==FALSE){
results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
df_comedy<-rbind(df_comedy,results)
}
}
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_comedy)
colnames(df_comedy) <- x#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() + geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidvizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() #+ geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 168] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_marivaux.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 148] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 249] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_boissy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_comedy$color<-df_comedy$author
df_comedy$color<-as.numeric(df_comedy$color)
df_comedy$color[df_comedy$color == 184] <- "blue"
df_comedy$color[df_comedy$color != "blue"] <- "white"
truc<-df_comedy$color
#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_comedy_moliere.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroid#vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_comedy)))+
vizEvoCentroid <- ggplot(data = df_comedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance")+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/spread_comedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidControlling variance
# with 4 plays
#x <- 1:71
x <- 1:nrow(df_comedy)
y<-roll_var(as.numeric(df_comedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)png("./R/images/variance2.png", width = 850, height = 600)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() ## quartz_off_screen
## 2
We select all the authors with more than 3 plays in the dataset
#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]
#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)## [1] 113
We select all the authors who have more than 1 tragedy
authorsSelected=c()
for (x in multiples){
results <- as.data.frame(metadata[metadata$Genre == 'tragedy' & metadata$Author == x,])
if(nrow(results)>2){
authorsSelected<-append(authorsSelected,x)
}
}We compute the distance to the centroid of the author, the centroid of the genre and the distance between both:
#create an empty df to store results
df_tragedy=data.frame(matrix(ncol = 7, nrow = 0))
#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0
#loop over plays
for (x in plays){
#increment
incr<-incr+1
#get author name
author <- metadata[incr,2]
#get genre
genre<-metadata[incr,4]
#get date
date<-metadata[incr,3]
#if author has written multiple texts present in the corpus and genre is known
if (author %in% authorsSelected==TRUE & genre =='tragedy' & date>1500 & date<1800){
#get the data of all the plays of an author
authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
# authorData = d[, metadata[, "Author"] == author]
#get all the plays of the same genre
# genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"]]
genreData = d[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
#compute distance to centroid of the author
authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
#compute distance to centroid of the genre
genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
#compute the mean of the centroid to the genre
meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
#get the distance of the play to the author
playDistAuthor<-authorToCentroid[x,]
#get the distance of the play to the genre
playDistGenre<-genreToCentroid[x,]
#diff author genre (rounded)
distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
#distance<-round(playDistGenre-playDistAuthor, digits = 2)
#save the result if value is not zero (problem occurred)
control<-playDistAuthor!=''
if (is.na(control)==FALSE){
results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
df_tragedy<-rbind(df_tragedy,results)
}
}
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_tragedy)
colnames(df_tragedy) <- x#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() +geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidvizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend=FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 148] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 131] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_crebillon.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 185] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_racine.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroid#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance")+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/spread_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidControlling variance
# with 4 plays
#x <- 1:71
x <- 1:nrow(df_tragedy)
y<-roll_var(as.numeric(df_tragedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)png("./R/images/variance2.png", width = 850, height = 600)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() ## quartz_off_screen
## 2
We select all the authors with more than 3 plays in the dataset
#all authors
n_occur <- data.frame(table(metadata$Author))
# Remove forst row (author empty value)
n_occur<-n_occur[-1,]
#more than 4 times
multiples <- n_occur[n_occur$Freq > 2,]
#transform df into vector
multiples <- multiples$Var1
#number of authors
length(multiples)## [1] 113
We select all the authors who have more than 1 tragedy
authorsSelected=c()
for (x in multiples){
results <- as.data.frame(metadata[metadata$Genre == 'tragedy' & metadata$Author == x,])
if(nrow(results)>2){
authorsSelected<-append(authorsSelected,x)
}
}We compute the distance to the centroid of the author (with n-grams), the centroid of the genre (with stopwords) and the distance between both.
#create an empty df to store results
df_tragedy=data.frame(matrix(ncol = 7, nrow = 0))
#get name of plays
plays<-rownames(metadata)
#prepare to iterate
incr<-0
#loop over plays
for (x in plays){
#increment
incr<-incr+1
#get author name
author <- metadata[incr,2]
#get genre
genre<-metadata[incr,4]
#get date
date<-metadata[incr,3]
#if author has written multiple texts present in the corpus and genre is known
if (author %in% authorsSelected==TRUE & genre =='tragedy' & date>1500 & date<1800){
#get the data of all the plays of an author
authorData = d[, metadata[, "Author"] == author & metadata[, "Genre"] == genre]
# authorData = d[, metadata[, "Author"] == author]
#get all the plays of the same genre
# genreData = d_stop[, metadata[, "Genre"] == genre & metadata[, "Date"]]
genreData = d_stop[, metadata[, "Genre"] == genre & metadata[, "Date"] <date+30 & metadata[, "Date"] >date-30]
#compute distance to centroid of the author
authorToCentroid = as.data.frame(DistToCentroid(authorData, method="manhattan"))
#compute distance to centroid of the genre
genreToCentroid = as.data.frame(DistToCentroid(genreData, method="manhattan"))
#compute the mean of the centroid to the genre
meanGenre = mean(as.numeric(genreToCentroid$DistToCentroid))
#get the distance of the play to the author
playDistAuthor<-authorToCentroid[x,]
#get the distance of the play to the genre
playDistGenre<-genreToCentroid[x,]
#diff author genre (rounded)
distance<- round(euclidean(playDistGenre,playDistAuthor), digits=3)
#distance<-round(playDistGenre-playDistAuthor, digits = 2)
#save the result if value is not zero (problem occurred)
control<-playDistAuthor!=''
if (is.na(control)==FALSE){
results<-c(x,author,genre,date,distance,playDistAuthor,playDistGenre,meanGenre)
df_tragedy<-rbind(df_tragedy,results)
}
}
}
#give a name to the columns
x <- c("play","author", "genre","date","distance", "distance2author","distance2genre","meanGenre")
labelPoints<-rownames(df_tragedy)
colnames(df_tragedy) <- x#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() +geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/distance2genre_tragedy_labels.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidvizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author), show.legend=FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre") +
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/test_distance2genre_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 148] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/test_distance2genre_tragedy_voltaire.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 131] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/test_distance2genre_tragedy_crebillon.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroiddf_tragedy$color<-df_tragedy$author
df_tragedy$color<-as.numeric(df_tragedy$color)
df_tragedy$color[df_tragedy$color == 185] <- "blue"
df_tragedy$color[df_tragedy$color != "blue"] <- "white"
truc<-df_tragedy$color
#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance2genre)))+
geom_point(stat = "summary", fun = "mean", aes(colour = color), show.legend = FALSE)+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance to the genre")+
scale_color_manual(values =c("blue"="blue","white"="transparent"))+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/test_distance2genre_tragedy_racine.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroid#vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=rownames(df_tragedy)))+
vizEvoCentroid <- ggplot(data = df_tragedy, mapping = aes(x = as.numeric(date), y = as.numeric(distance), label=author))+
geom_point(stat = "summary", fun = "mean", aes(colour = author))+
geom_smooth(method = loess, size = 1)+
xlab("Date") + ylab("Distance")+
theme_bw() #+geom_text(hjust=0, vjust=0, size=3)
ggsave("R/images/test_spread_tragedy.png", plot=vizEvoCentroid, width = 2500, height = 2000, units = "px", dpi = 300)
vizEvoCentroidControlling variance
# with 4 plays
#x <- 1:71
x <- 1:nrow(df_tragedy)
y<-roll_var(as.numeric(df_tragedy$distance), width = 30, min_obs = 1)
lo <- loess(y~x)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)png("./R/images/variance2.png", width = 850, height = 600)
plot(x,y,xlab="index", ylab="variance")
lines(predict(lo), col='red', lwd=2)
dev.off() ## quartz_off_screen
## 2